
*-------------------------------------------------------------------------------
* Estimate bias-corrected slope coefficient 
* Estimate bias-corrected R-squared
* Plot graphs
* Estimate bias-corrected rents and cd's for each window
*-------------------------------------------------------------------------------

cap log close
log using "$log/15_03_split_sample_check_7stack", text replace

*-----------------------------------
*Create files for analysis
*-----------------------------------


*Merge the split-sample variables for each window
local i_list = "20017 20087 20157"
dis "`i_list'"


foreach i in `i_list' {

	global ver1 = `i' * 10 + 1
	global ver2 = `i' * 10 + 2

	
	use $temp/workers_currentid_year_ver`i', clear
	rename currentid betnr
	collapse (mean) workers, by(betnr)
	rename workers av_worker_years
	
	*sample split 1
	merge 1:1 betnr using $temp/firmfe_value_new_ver${ver1}, keepus(betnr value firm_fe std_V std_fe)
	rename (value firm_fe std_fe std_V) (value1 firm_fe1 std_fe1 std_V1)
	drop _merge
	
	*sample split 2
	merge 1:1 betnr using $temp/firmfe_value_new_ver${ver2}, keepus(betnr value firm_fe std_V std_fe)
	rename (value firm_fe std_fe std_V) (value2 firm_fe2 std_fe2 std_V2)
	drop _merge 
	
	*non-split
	merge 1:1 betnr using $temp/firmfe_value_new_ver`i', keepus(betnr value firm_fe std_V std_fe rent cd)
	keep if inlist(_merge,2,3)
	drop _merge
	gen window = `i'
	
	save $temp/firmfe_value_split_`i', replace
}

clear

*Append the windows
foreach i in `i_list' {
	append using $temp/firmfe_value_split_`i'
}

save $temp/firmfe_value_split_all7stack, replace

*Clean up...
foreach i in `i_list' {	
	cap erase $temp/firmfe_value_split_`i'.dta
}

*-----------------------------------
*Compute the bias-corrected slopes
*-----------------------------------

use $temp/firmfe_value_split_all7stack, clear

*Regression on the panel using non-versioned data 
reghdfe std_fe std_V [aweight = av_worker_years], a(window) vce(cl betnr)
local slope_ols = _b[std_V]

*Method 1: Split-sample IV regression using versioned data (2 ways)
ivregress 2sls std_fe i.window (std_V1=std_V2) [aweight = av_worker_years], vce(cl betnr)
ivregress 2sls std_fe i.window (std_V2=std_V1) [aweight = av_worker_years], vce(cl betnr)

*Method 2: Moment-based adjustment for bias correction 

*First, residualize the window FE
forvalues s = 1/2{
	reghdfe std_fe`s' [aweight = av_worker_years], a(window) resid(std_fe`s'_R)
	reghdfe std_V`s' [aweight = av_worker_years], a(window) resid(std_V`s'_R)
}
reghdfe std_V [aweight = av_worker_years], a(window) resid(std_V_R)
reghdfe std_fe [aweight = av_worker_years], a(window) resid(std_fe_R)

*Compute variance of V* as cov(V1,V2) (residualized)
corr std_V1_R std_V2_R [aweight = av_worker_years], covariance
local var_v_star = r(cov_12)
*Compute variance of V (residualized)
summ std_V_R [aweight = av_worker_years]
local var_v = r(Var)
*Compute bias-corrected slope 
local slope_bc = (`slope_ols'*`var_v')/`var_v_star'
di "Bias-corrected slope (moment-based adjustment)"
di `slope_bc'

global beta = `slope_bc' // for figure

*Regression on the panel using non-versioned data, conditional on the obs in the split-sample regression
qui reghdfe std_V1 std_V2 [aweight = av_worker_years], a(window) vce(cl betnr)
gen insamp = e(sample)
reghdfe std_fe std_V if insamp [aweight = av_worker_years], a(window) vce(cl betnr)
drop insamp 

*------------------------------------------------------------
* Compute bias-corrected R-sq
* R-sq-bc := (Cov(psi_1,V_1))^2/[Cov(psi_1,psi_2)*Cov(V_1,V_2)]
* numerator can also be computed as Cov(psi_2,V_2) or
* Cov(psi,V). Do all three below. 
*------------------------------------------------------------

*Compute covariances
corr std_fe1_R std_V1_R [aweight = av_worker_years], covariance
local num2 = r(cov_12)
corr std_fe1_R std_fe2_R [aweight = av_worker_years], covariance
local cov_fe = r(cov_12)
corr std_V1_R std_V2_R [aweight = av_worker_years], covariance
local cov_v = r(cov_12)

*Construct R-sq-bc
local rsq_bc2 = (`num2')^2/(`cov_fe'*`cov_v')

di "Bias-corrected R-sq estimates"
di `rsq_bc2'

*R-squared without bias correction (removing window FE)
reg std_fe_R std_V_R [aweight = av_worker_years]


*-------------------------------------------------------------------------------
* Step 2: Determine the psi-V slope in binned cells & aggregate to cell level
*-------------------------------------------------------------------------------

*Adjust V to deflate the variance...
*this is to get the adjusted slope right in the graph
gen adj_std_V_R = std_V_R*(`var_v_star'/`var_v')

local V = "std_V_R adj_std_V_R "
foreach v in `V' {
	
	dis as text "`v'"
	
	preserve

		*Bin by percentiles 
		centile `v', centile(1(1)99)

		matrix centiles = J(99,1,.)
		forvalues i = 1/99 {
			matrix centiles[`i',1] = r(c_`i')
		}

		*generate bins
		gen double V_bin=.
		gen double V_binval = .
		gen double V_binsize = .

		replace V_bin=1 if `v'<=centiles[1,1]
		qui sum `v' if V_bin==1
		replace V_binval = r(mean) if V_bin==1
		replace V_binsize = r(N) if V_bin==1

		forvalues i =2/99{
			local j = `i'-1
			replace V_bin=`i' if `v'<=centiles[`i',1] & `v'>centiles[`j',1]
			qui sum `v' if V_bin==`i'
			replace V_binval = r(mean) if V_bin==`i'
			replace V_binsize = r(N) if V_bin==`i'
		}

		replace V_bin=100 if `v'>centiles[99,1]
		qui sum `v' if V_bin==100
		replace V_binval = r(mean) if V_bin==100
		replace V_binsize = r(N) if V_bin==100

		collapse (mean) av_std_fe= std_fe_R ///
		(sd) sd_std_fe= std_fe_R ///
		(mean) V_binval = V_binval ///
		(mean) V_binsize = V_binsize, by(V_bin)

		gen lb_av_std_fe = av_std_fe-sd_std_fe
		gen ub_av_std_fe = av_std_fe+sd_std_fe

		order V_bin V_binval V_binsize av_std_fe ///
		sd_std_fe lb_av_std_fe ub_av_std_fe
		
		*Slope in collapsed data
		twoway (scatter av_std_fe V_binval, mcolor(blue) msymbol(circle_hollow) mlwidth(medthick)) /// 
		(line ub_av_std_fe V_binval, lcolor(red) lpattern(vshortdash)) /// 
		(line lb_av_std_fe V_binval, lcolor(red) lpattern(vshortdash)) /// 
		(lfit av_std_fe V_binval, lcolor(black) lwidth(medthick)), /// 
		ytitle(Employer fixed effect ({&psi})) xtitle(Employer value (V)) legend(off) graphr(color(gs16))
		graph export "$graph/`v'_fe_p1_2_99_7_stack.pdf", replace

		save "$graph/`v'_fe_p1_2_99_7_stack.dta", replace

	restore
}

*------------------------------------------------------------------------------- 
* Create bias-corrected estimates for rents and cds
*------------------------------------------------------------------------------- 

*Linear version (baseline)
gen rent_bc = .
gen cd_bc   = .
 
foreach i in `i_list' {	
	ivregress 2sls firm_fe1 (value1=value2) if window==`i' [aweight = av_worker_years]
	replace rent_bc = _b[value1]*value + _b[_cons] if window == `i'
	replace cd_bc   = firm_fe - rent_bc if window == `i'
}

*Non-linear version 
dis "`i_list'"

gen rent_bc_nl = .
gen cd_bc_nl = .

forv p = 2/4 {
 gen value1_p`p' = value1^(`p')	
 gen value2_p`p' = value2^(`p')	
 }	

foreach i in `i_list' {
	
	ivregress 2sls firm_fe1 (value1 value1_p2 value1_p3 value1_p4 = value2 value2_p2 value2_p3 value2_p4) if window == `i' [aweight = av_worker_years]	
	replace rent_bc_nl = _b[value1]*value + _b[value1_p2]*value^2 + _b[value1_p3]*value^3 + _b[value1_p4]*value^4 if window == `i'
	replace cd_bc_nl   = firm_fe - rent_bc_nl if window == `i'
	
}


*-------------------------------------------------------------------------------
* Output data 
*-------------------------------------------------------------------------------
dis "`i_list'"

foreach i in `i_list' {
	preserve 
	keep if window == `i' 
	keep betnr firm_fe rent_bc cd_bc
	rename rent_bc rents
	rename cd_bc cd
	save "$temp/firmfe_value_new_ver`i'_bc.dta", replace
	restore
}

foreach i in `i_list' {
	preserve 
	keep if window == `i' 
	keep betnr firm_fe rent_bc_nl cd_bc_nl
	rename rent_bc_nl rents
	rename cd_bc_nl cd
	save "$temp/firmfe_value_new_ver`i'_bc_nl.dta", replace
	restore
}


*-------------------------------------------------------------------------------
* Decompose variance in CDs explained by observables
*-------------------------------------------------------------------------------

merge m:1 betnr using "$temp/wz.dta"

drop if _merge == 2
drop _merge

*by betnr and window: merge on state, kreise, sector, industry
*use modal values for the observables in BHP for each betnr
*then:

encode sector, g(sector_num)

reghdfe cd [aweight = av_worker_years], a(window#bula)
reghdfe cd [aweight = av_worker_years], a(window#kreis)
reghdfe cd [aweight = av_worker_years], a(window#sector_num)
reghdfe cd [aweight = av_worker_years], a(window#ind)
reghdfe cd [aweight = av_worker_years], a(window#ind#kreis)

reghdfe cd_bc [aweight = av_worker_years], a(window#bula)
reghdfe cd_bc [aweight = av_worker_years], a(window#kreis)
reghdfe cd_bc [aweight = av_worker_years], a(window#sector_num)
reghdfe cd_bc [aweight = av_worker_years], a(window#ind) residuals(cd_bc_ind_only)
reghdfe cd_bc [aweight = av_worker_years], a(window#ind#kreis) residuals(cd_bc_res)


clear
log close
